; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mcpu=pentium4 -O0 | FileCheck %s

target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128"
target triple = "i386-unknown-linux-unknown"

define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind {
; CHECK-LABEL: doTheTestMod:
; CHECK:       # %bb.0: # %Entry
; CHECK-NEXT:    subl $140, %esp
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT:    movaps %xmm0, %xmm6
; CHECK-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT:    movaps %xmm0, %xmm1
; CHECK-NEXT:    movaps %xmm0, %xmm3
; CHECK-NEXT:    psrlq $48, %xmm3
; CHECK-NEXT:    movaps %xmm0, %xmm2
; CHECK-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1,1,1]
; CHECK-NEXT:    psrld $16, %xmm0
; CHECK-NEXT:    movaps %xmm6, %xmm7
; CHECK-NEXT:    movaps %xmm6, %xmm4
; CHECK-NEXT:    psrlq $48, %xmm4
; CHECK-NEXT:    movaps %xmm6, %xmm5
; CHECK-NEXT:    shufps {{.*#+}} xmm5 = xmm5[1,1,1,1]
; CHECK-NEXT:    psrld $16, %xmm6
; CHECK-NEXT:    pextrw $0, %xmm7, %eax
; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
; CHECK-NEXT:    movw %ax, {{[0-9]+}}(%esp)
; CHECK-NEXT:    pextrw $0, %xmm6, %eax
; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
; CHECK-NEXT:    movw %ax, {{[0-9]+}}(%esp)
; CHECK-NEXT:    pextrw $0, %xmm5, %eax
; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
; CHECK-NEXT:    movw %ax, {{[0-9]+}}(%esp)
; CHECK-NEXT:    pextrw $0, %xmm4, %eax
; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
; CHECK-NEXT:    movw %ax, {{[0-9]+}}(%esp)
; CHECK-NEXT:    pextrw $0, %xmm3, %eax
; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
; CHECK-NEXT:    movw %ax, {{[0-9]+}}(%esp)
; CHECK-NEXT:    pextrw $0, %xmm2, %eax
; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
; CHECK-NEXT:    movw %ax, {{[0-9]+}}(%esp)
; CHECK-NEXT:    pextrw $0, %xmm0, %eax
; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
; CHECK-NEXT:    movw %ax, {{[0-9]+}}(%esp)
; CHECK-NEXT:    pextrw $0, %xmm1, %eax
; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
; CHECK-NEXT:    movw %ax, {{[0-9]+}}(%esp)
; CHECK-NEXT:    # implicit-def: $xmm0
; CHECK-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT:    # implicit-def: $xmm0
; CHECK-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT:    # implicit-def: $xmm0
; CHECK-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT:    # implicit-def: $xmm0
; CHECK-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
; CHECK-NEXT:    # implicit-def: $xmm1
; CHECK-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm1
; CHECK-NEXT:    movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT:    # implicit-def: $xmm1
; CHECK-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm1
; CHECK-NEXT:    movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT:    # implicit-def: $xmm1
; CHECK-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm1
; CHECK-NEXT:    movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT:    # implicit-def: $xmm1
; CHECK-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm1
; CHECK-NEXT:    movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT:    pextrw $0, %xmm0, %eax
; CHECK-NEXT:    movw %ax, %cx
; CHECK-NEXT:    movl %esp, %eax
; CHECK-NEXT:    movw %cx, (%eax)
; CHECK-NEXT:    calll __extendhfsf2
; CHECK-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-NEXT:    pextrw $0, %xmm0, %eax
; CHECK-NEXT:    movw %ax, %cx
; CHECK-NEXT:    movl %esp, %eax
; CHECK-NEXT:    movw %cx, (%eax)
; CHECK-NEXT:    calll __extendhfsf2
; CHECK-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; CHECK-NEXT:    movl %esp, %eax
; CHECK-NEXT:    fxch %st(1)
; CHECK-NEXT:    fstps 4(%eax)
; CHECK-NEXT:    fstps (%eax)
; CHECK-NEXT:    calll fmodf
; CHECK-NEXT:    movl %esp, %eax
; CHECK-NEXT:    fstps (%eax)
; CHECK-NEXT:    calll __truncsfhf2
; CHECK-NEXT:    movaps %xmm0, %xmm1
; CHECK-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT:    movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT:    pextrw $0, %xmm0, %eax
; CHECK-NEXT:    movw %ax, %cx
; CHECK-NEXT:    movl %esp, %eax
; CHECK-NEXT:    movw %cx, (%eax)
; CHECK-NEXT:    calll __extendhfsf2
; CHECK-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-NEXT:    pextrw $0, %xmm0, %eax
; CHECK-NEXT:    movw %ax, %cx
; CHECK-NEXT:    movl %esp, %eax
; CHECK-NEXT:    movw %cx, (%eax)
; CHECK-NEXT:    calll __extendhfsf2
; CHECK-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; CHECK-NEXT:    movl %esp, %eax
; CHECK-NEXT:    fxch %st(1)
; CHECK-NEXT:    fstps 4(%eax)
; CHECK-NEXT:    fstps (%eax)
; CHECK-NEXT:    calll fmodf
; CHECK-NEXT:    movl %esp, %eax
; CHECK-NEXT:    fstps (%eax)
; CHECK-NEXT:    calll __truncsfhf2
; CHECK-NEXT:    movaps %xmm0, %xmm1
; CHECK-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT:    movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT:    pextrw $0, %xmm0, %eax
; CHECK-NEXT:    movw %ax, %cx
; CHECK-NEXT:    movl %esp, %eax
; CHECK-NEXT:    movw %cx, (%eax)
; CHECK-NEXT:    calll __extendhfsf2
; CHECK-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-NEXT:    pextrw $0, %xmm0, %eax
; CHECK-NEXT:    movw %ax, %cx
; CHECK-NEXT:    movl %esp, %eax
; CHECK-NEXT:    movw %cx, (%eax)
; CHECK-NEXT:    calll __extendhfsf2
; CHECK-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; CHECK-NEXT:    movl %esp, %eax
; CHECK-NEXT:    fxch %st(1)
; CHECK-NEXT:    fstps 4(%eax)
; CHECK-NEXT:    fstps (%eax)
; CHECK-NEXT:    calll fmodf
; CHECK-NEXT:    movl %esp, %eax
; CHECK-NEXT:    fstps (%eax)
; CHECK-NEXT:    calll __truncsfhf2
; CHECK-NEXT:    movaps %xmm0, %xmm1
; CHECK-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT:    movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT:    pextrw $0, %xmm0, %eax
; CHECK-NEXT:    movw %ax, %cx
; CHECK-NEXT:    movl %esp, %eax
; CHECK-NEXT:    movw %cx, (%eax)
; CHECK-NEXT:    calll __extendhfsf2
; CHECK-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-NEXT:    pextrw $0, %xmm0, %eax
; CHECK-NEXT:    movw %ax, %cx
; CHECK-NEXT:    movl %esp, %eax
; CHECK-NEXT:    movw %cx, (%eax)
; CHECK-NEXT:    calll __extendhfsf2
; CHECK-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; CHECK-NEXT:    movl %esp, %eax
; CHECK-NEXT:    fxch %st(1)
; CHECK-NEXT:    fstps 4(%eax)
; CHECK-NEXT:    fstps (%eax)
; CHECK-NEXT:    calll fmodf
; CHECK-NEXT:    movl %esp, %eax
; CHECK-NEXT:    fstps (%eax)
; CHECK-NEXT:    calll __truncsfhf2
; CHECK-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 4-byte Reload
; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 4-byte Reload
; CHECK-NEXT:    # xmm2 = mem[0],zero,zero,zero
; CHECK-NEXT:    movaps %xmm0, %xmm3
; CHECK-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-NEXT:    addl $140, %esp
; CHECK-NEXT:    retl
Entry:
  %x = alloca <4 x half>, align 8
  %y = alloca <4 x half>, align 8
  store <4 x half> %0, ptr %x, align 8
  store <4 x half> %1, ptr %y, align 8
  %2 = load <4 x half>, ptr %x, align 8
  %3 = load <4 x half>, ptr %y, align 8
  %4 = frem <4 x half> %2, %3
  ret <4 x half> %4
}

